*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do files uses cluster generated in C++ (lines 2-17 of Algorithm 1)  to 
* complete the deliniation of prime locations (lines 18-34 of Algorithm 1)
* This do file is run for on clusters generated based on big data establishments
* with employment weights estimated from the first half of cities

* Employment type identifier to 0 for big data establishment employment with weights from first half of cities
	global ET = 0

* Create temporary space for output
	capture mkdir "$temp/grid_PL_output_ET${ET}"	
	
* Loop over CBSAs
	qui u "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.dta", clear	
	
* Sample for weights creation from the first half of the alphabet (overlapping sample of USA MSAs and Global Cities)
* We predict employment for the second half
	local USMETROIDS 33460 34980  35620  36740  37980  38060 38300  38900  39300  40140  40900 41180  41700  41740  41860  41940 42660  45300  47260  47900 	
	
* Sample for weights creation from the first half of the alphabet includes the median 33460, hence it is included in the below list of cities for which we predict employment
	local count = 1
	* Begin loop by metro 
			foreach USmid of local USMETROIDS{	
			di "...Working on CBSA `USmid', number `count' of `Ncbsafp'..."
			u "$dataoutput/USMetroGridEmp/EmpType_${ET}/EmpGrid_`USmid'", clear	// Using baseline clusters (total emp, standard p-value)
		
			* Fix special cases ************************************************
				 qui sum clusterID if clusterID != . // If there are no clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' == 0 {
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
				qui sum clusterID if clusterID != .  // If there are too many clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' > 50 {
						 qui replace clusterID = 0
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
			
			
			* DATA PREP ********************************************************
				qui ren cell_total_emp emp
			* Cluster employment relative to max PL employment in city
				qui gen TrimmedCluster = clusterID > 0								// Trimmed cluster variable
				qui egen Cemp = sum(emp), by(metro_id clusterID)
				qui egen Carea = count(emp), by(metro_id clusterID)
				qui gen Cempdens = 	Cemp  / Carea	
				qui egen CempMax = max(Cemp) if clusterID != 0, by(metro_id)		// Employment of largest cluster in metro
				qui gen CempRel = Cemp/CempMax 										// Cluster employment relative to largest cluster in metro
				qui replace TrimmedCluster = 0 if CempRel < $PLtresh				// Restrict to clusters with minimum relative employment

			* Ceneterate trimmed cluster variables	
				qui gen TrimmedClusterID = clusterID*TrimmedCluster if clusterID > 0 // Trimmed clusters identified
				qui egen TrimmedClusterX = mean(grid_x) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)
				qui egen TrimmedClusterY = mean(grid_y) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)		
	
			* AGGREGATION ******************************************************
				qui sum metro_id
				global MetroMin=r(min)
				global MetroMax=r(max)
				foreach num of numlist $MetroMin / $MetroMax { 					// This programme can process several metros. 
				local O = 999
				local i = 1
				while `O' > 0 {
				display "...round `i' of city `num'..."
				qui gen TrimmedClusterIDold = TrimmedClusterID 					// save old ID
				AGGCLUSTER2 `num' $DISTtresh
				qui gen test = abs(TrimmedClusterIDold-TrimmedClusterID) 		// generate objective
				qui sum test
				local O = r(sum)
				qui drop TrimmedClusterIDold test
				local i = `i'+1
				}
				}
				* Generate PL ID
				capture gen PLID = .											// gen PL ID variable
				qui replace PLID = TrimmedClusterID 

			* GENERATING CONVEX HULL *******************************************
				foreach num of numlist $MetroMin / $MetroMax { 					// loop over metros
				levelsof PLID if metro_id == `num', local(levels) 				// get list of PL IDs in a metro
				foreach pl of numlist `levels' {								// loop over PL IDs
					CH `num' `pl' 2
				}																// PL loop closes
				}																// metro loop closes
				
				* Clean temp folder 
				preserve 
					filelist, dir("$temp/PLtemp")
					levelsof filename, local(files)
					qui foreach f of local files{
						erase "$temp/PLtemp/`f'"
					}
				restore 
				
			* SPLIT PLs if parts are non-contiguous ****************************
				SPLITTER $SplitDist	
				
			* Generate PL variables	gen PL = PLID>0 & PLID != .
				qui gen PL = PLID>0 & PLID != .
				qui gen PL_x = grid_x if PL == 1
				qui gen PL_y = grid_y if PL == 1
				qui egen PL_emp = sum(emp) if PL == 1,by(metro_id PLID)
				qui egen PL_area = sum(area_g) if PL == 1,by(metro_id PLID)
				qui gen PL_empdens = PL_emp / PL_area
				qui egen PLX = mean(grid_x) if PLID > 0 & PLID != ., by(PLID metro_id)
				qui egen PLY = mean(grid_y) if PLID > 0 & PLID != ., by(PLID metro_id)		
				qui egen metro_emp = sum(emp) ,by(metro_id )

			* Flag as pseudo-PLs an drop unless the only one in city ***********************
				qui gen PPL = PL_emp < 10000  									// If PL has not enough employment
				preserve // Compute rank
				qui keep PL PPL PLID cbsafp PL_emp
					qui drop if PL == 0
					qui duplicates drop
					qui egen PLrankByCity = rank(PL_emp), by(cbsafp) field
					qui save "$temp/PLID_metro_rank_temp.dta", replace
				restore
				qui merge m:1 PLID cbsafp using "$temp/PLID_metro_rank_temp.dta", keepusing(PLrankByCity)	
				qui drop _m
				qui erase "$temp/PLID_metro_rank_temp.dta"
				qui  foreach var of varlist PL* {								// remove pseudo PLs, except if they are the larges PPL in the metro
					qui replace `var' = . if PPL == 1 & PLrankByCity != 1
				}
				qui replace PL = 0 if PL == .
				qui drop PL_emp // Now need to update PL_emp
				qui egen PL_emp = sum(emp) if PL == 1, by(cbsafp PLID)
				
			* Generate TS employment share within and outside PLs
				qui egen TSemp_insidePL = sum(cell_TS_emp) if PL == 1, by(cbsafp)
				qui egen TSemp_outsidePL = sum(cell_TS_emp) if PL == 0, by(cbsafp)
				foreach name in insidePL outsidePL {
					egen temp = mean(TSemp_`name'), by(cbsafp)
					drop TSemp_`name'
					ren temp TSemp_`name'
				}
				qui gen TSempshare_insidePL = TSemp_insidePL / TSemp_insidePL * 100
					label var TSempshare_insidePL "TS share within PLs (%)"
				qui gen TSempshare_outsidePL = TSemp_outsidePL / (metro_emp-TSemp_insidePL) *100
					label var TSempshare_outsidePL "TS share outside PLs (%)"
			* MAP PL
				local title = metro_name[1]
				twoway  	(scatter grid_y grid_x , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.25)) ///
						(scatter grid_y grid_x if developable == 1, msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.4)) ///
						(scatter grid_y grid_x if emp > 0 , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.75)) ///
						(scatter grid_y grid_x if  PL == 1 , msize(tiny)  mlcolor(none)  msymbol(s) mcolor(red*0.5)) ///
						(scatter PLY PLX , msize(tiny) msymbol(s)  mlcolor(none)  mcolor(red) mlabel(PLrankByCity) mlabcolor(red)) ///
						, legend(off) xsize(5) ysize(5) xlabel(0[10000]70000)		ylabel(0[10000]70000) graphregion(color(white))	title("`title'")
				capture mkdir "$temp/figures_App/US-CBSAs"
				capture mkdir "$temp/figures_App/US-CBSAs/ET${ET}"
				capture mkdir "$temp/figures_App/US-CBSAs/ET${ET}/MAPS"
				qui graph export "$temp/figures_App/US-CBSAs/ET${ET}/MAPS/MAPS_PL_TotalEmp_BaseP_`USmid'.png", replace height(1000) width(1000)
		
			
			* Save outputs *****************************************************
				qui drop TrimmedCluster Cemp Carea Cempdens CempMax CempRel TrimmedClusterID TrimmedClusterX TrimmedCluster
				qui save "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}_`USmid'.dta", replace
				qui keep if PL == 1
				qui save "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}_`USmid'", replace
			*Reset counter
				local `count' = `count' +1
			}
	* Loop ends
	
* Append outputs of loops into metro-grid files
		display "...compiling employment metro-grid data..."
		display `USMETROIDS'
		clear
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}_`USmid'.dta"
			}	
			qui save "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}__all.dta", replace
		clear 
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}_`USmid'"
			}	
			qui keep if PL == 1
			qui save "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}__all.dta", replace

* Script ends
